import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn import cluster
from yellowbrick.cluster import KElbowVisualizer
from sklearn.preprocessing import normalize
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics import silhouette_score
from sklearn.cluster import MeanShift, estimate_bandwidth, KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import v_measure_score
from sklearn.preprocessing import normalize
import scipy.cluster.hierarchy as shc
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras import layers
from keras.layers import BatchNormalization
from keras import models
from keras import optimizers
from keras import callbacks
from keras.layers import Dropout
from sklearn import metrics
from sklearn.metrics import classification_report
import math
from sklearn.metrics import mean_squared_error
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')
dfGC = pd.read_csv('/content/drive/My Drive/Final thesis/reshapedGC.csv', parse_dates = ['Datetime'], dayfirst = True,na_filter=False)
dfGC['minute'] = dfGC['Datetime'].apply(lambda x: x.minute)
dfGC['hour'] = dfGC['Datetime'].apply(lambda x: x.hour + x.minute/60)
dfGC['weekday'] = dfGC['Datetime'].apply(lambda x: x.weekday() < 5)
dfGCwk = dfGC[dfGC.weekday == True]
dfGCwkn = dfGC[dfGC.weekday == False]
import calendar # To have human-readable names for the days of the week
dfGCwk['Datetime'].apply(lambda x: calendar.day_name[x.weekday()]).value_counts()
dfGCwk_dpat = dfGCwk.groupby(by=dfGCwk.hour).mean()
dfGCwk_dpat = dfGCwk_dpat.drop(columns = ['minute','weekday'])
dfGCwk_dpat.head()
X_wk = dfGCwk.groupby(by=dfGCwk.Datetime).mean()
X_wk = X_wk.drop(columns = ['minute','hour','weekday'])
X_wk = X_wk.transpose()
X_wk.head()
dfGCwkn_dpat = dfGCwkn.groupby(by=dfGCwkn.hour).mean()
dfGCwkn_dpat = dfGCwkn_dpat.drop(columns = ['minute','weekday'])
dfGCwkn_dpat.head()
X_wkn = dfGCwkn.groupby(by=dfGCwkn.Datetime).mean()
X_wkn = X_wkn.drop(columns = ['minute','hour','weekday'])
X_wkn = X_wkn.transpose()
X_wkn.head()
loc_6h = mpl.ticker.MultipleLocator(6)
ax = dfGCwk_dpat.plot(color='k', alpha=0.15, lw=0.5)
dfGCwk_dpat.mean(axis=1).plot(color='tab:green', lw=3, alpha=0.7)
ax.xaxis.set_major_locator(loc_6h)
plt.legend(ax.lines[-2:], ['1 customer', 'average'])
ax.set(
xlabel='hour of day',
ylabel='power (kW)',
ylim=(-0.1, 3.5),
title='Daily consumption over 2012-2013'
);
plt.tight_layout()
ax = dfGCwkn_dpat.plot(color='k', alpha=0.15, lw=0.5)
dfGCwk_dpat.mean(axis=1).plot(color='tab:green', lw=3, alpha=0.7)
ax.xaxis.set_major_locator(loc_6h)
plt.legend(ax.lines[-2:], ['1 customer', 'average'])
ax.set(
xlabel='hour of day',
ylabel='power (kW)',
ylim=(-0.1, 3.5),
title='Daily consumption over 2012-2013'
);
plt.tight_layout()
def elbow_method_wk(dataframe, min_k, max_k):
# Try all k's between min_k and max_k
cluster_nums = range(min_k, max_k + 1)
# Array of K-means algorithm objects
k_mean_algs = [cluster.KMeans(n_clusters = k) for k in cluster_nums]
# Array with the results of the K-mean algorithms
k_mean_res = [alg.fit(dataframe) for alg in k_mean_algs]
# Array with the centroids for each value of k
centroids = [res.cluster_centers_ for res in k_mean_res]
# Euclidean distance between the points of the cluster and the centroids
distances = [cdist(dataframe, centroid, 'euclidean') for centroid in centroids]
# Get the closest centroid (and the corresponding distance)
min_indices = [np.argmin(distance, axis = 1) for distance in distances]
min_distances = [np.min(distance, axis = 1) for distance in distances]
# Calculate the average squared distance
avg_sum_squares = [sum(dist ** 2) / X_wk.shape[0] for dist in min_distances]
# Plot the elbow graph
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(cluster_nums, avg_sum_squares, 'b*-',color='tab:green')
plt.grid(True)
plt.xlabel('Number of clusters')
plt.ylabel('Average sum of squares within cluster')
plt.show()
return (k_mean_algs, k_mean_res)
def elbow_method_wkn(dataframe, min_k, max_k):
# Try all k's between min_k and max_k
cluster_nums = range(min_k, max_k + 1)
# Array of K-means algorithm objects
k_mean_algs = [cluster.KMeans(n_clusters = k) for k in cluster_nums]
# Array with the results of the K-mean algorithms
k_mean_res = [alg.fit(dataframe) for alg in k_mean_algs]
# Array with the centroids for each value of k
centroids = [res.cluster_centers_ for res in k_mean_res]
# Euclidean distance between the points of the cluster and the centroids
distances = [cdist(dataframe, centroid, 'euclidean') for centroid in centroids]
# Get the closest centroid (and the corresponding distance)
min_indices = [np.argmin(distance, axis = 1) for distance in distances]
min_distances = [np.min(distance, axis = 1) for distance in distances]
# Calculate the average squared distance
avg_sum_squares = [sum(dist ** 2) / X_wkn.shape[0] for dist in min_distances]
# Plot the elbow graph
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(cluster_nums, avg_sum_squares, 'b*-',color='tab:green')
plt.grid(True)
plt.xlabel('Number of clusters')
plt.ylabel('Average sum of squares within cluster')
plt.show()
return (k_mean_algs, k_mean_res)
k_algs, k_res = elbow_method_wk(X_wk,2,12)
k_algs, k_res = elbow_method_wkn(X_wkn,2,12)
km_wk = KMeans().fit(X_wk)
pd.Series.value_counts(km_wk.labels_)
visualizer = KElbowVisualizer(km_wk, k=(2,12), timings=False)
visualizer.fit(X_wk)
visualizer.show()
km_wkn = KMeans().fit(X_wkn)
pd.Series.value_counts(km_wkn.labels_)
visualizer = KElbowVisualizer(km_wkn, k=(2,12), timings=False)
visualizer.fit(X_wkn)
visualizer.show()
X_wk_v = dfGCwk
X_wk_v = dfGCwk.drop(columns = ['minute','hour','weekday'])
X_wk_v = X_wk_v.groupby(X_wk_v.Datetime).mean()
len(X_wk_v)
X_wkn_v = dfGCwkn
X_wkn_v = dfGCwkn.drop(columns = ['minute','hour','weekday'])
X_wkn_v = X_wkn_v.groupby(X_wkn_v.Datetime).mean()
len(X_wkn_v)
X_wk_v.head()
v_scores = []
N_Clusters = [2, 3, 4, 5, 6, 7, 8]
km_2_wk = KMeans(2).fit(X_wk_v)
True_lables1 =km_2_wk.labels_
labels1 = km_2_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels1))
km_3_wk = KMeans(3).fit(X_wk_v)
True_lables2 =km_3_wk.labels_
labels2 = km_3_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels2))
km_4_wk = KMeans(4).fit(X_wk_v)
True_lables3 =km_4_wk.labels_
labels3 = km_4_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels3))
km_5_wk = KMeans(5).fit(X_wk_v)
True_lables4 =km_5_wk.labels_
labels4 = km_5_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels4))
km_6_wk = KMeans(6).fit(X_wk_v)
True_lables5 =km_6_wk.labels_
labels5 = km_6_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels5))
km_7_wk = KMeans(7).fit(X_wk_v)
True_lables6 =km_7_wk.labels_
labels6 = km_7_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels6))
km_8_wk = KMeans(8).fit(X_wk_v)
True_lables7 =km_8_wk.labels_
labels7 = km_7_wk.predict(X_wk_v)
v_scores.append(v_measure_score(X_wk_v['3'], labels7))
#Plotting a Bar Graph to compare the models
plt.bar(N_Clusters, v_scores)
plt.xlabel('Number of Clusters')
plt.ylabel('V-Measure Score')
plt.title('Comparison of different Clustering Models')
plt.show()
print('k = 3 the score is ',silhouette_score(X_wk_v, km_3_wk.labels_, metric = 'euclidean'),'\n',
'k = 4 the score is ',silhouette_score(X_wk_v, km_4_wk.labels_, metric = 'euclidean'),'\n',
'k = 5 the score is ',silhouette_score(X_wk_v, km_5_wk.labels_, metric = 'euclidean'),'\n',
'k = 6 the score is ',silhouette_score(X_wk_v, km_6_wk.labels_, metric = 'euclidean'),'\n',
'k = 7 the score is ',silhouette_score(X_wk_v, km_7_wk.labels_, metric = 'euclidean'))
v_scores = []
N_Clusters = [2, 3, 4, 5, 6, 7, 8]
km_2_wkn = KMeans(2).fit(X_wkn_v)
True_lables1 =km_2_wkn.labels_
labels1 = km_2_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels1))
km_3_wkn = KMeans(3).fit(X_wkn_v)
True_lables2 =km_3_wkn.labels_
labels2 = km_3_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels2))
km_4_wkn = KMeans(4).fit(X_wkn_v)
True_lables3 =km_4_wkn.labels_
labels3 = km_4_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels3))
km_5_wkn = KMeans(5).fit(X_wkn_v)
True_lables4 =km_5_wkn.labels_
labels4 = km_5_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels4))
km_6_wkn = KMeans(6).fit(X_wkn_v)
True_lables5 =km_6_wkn.labels_
labels5 = km_6_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels5))
km_7_wkn = KMeans(7).fit(X_wkn_v)
True_lables6 =km_7_wkn.labels_
labels6 = km_7_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels6))
km_8_wkn = KMeans(8).fit(X_wkn_v)
True_lables7 =km_8_wkn.labels_
labels7 = km_7_wkn.predict(X_wkn_v)
v_scores.append(v_measure_score(X_wkn_v['1'], labels7))
#Plotting a Bar Graph to compare the models
plt.bar(N_Clusters, v_scores)
plt.xlabel('Number of Clusters')
plt.ylabel('V-Measure Score')
plt.title('Comparison of different Clustering Models')
plt.show()
print('k = 3 the score is ',silhouette_score(X_wkn_v, km_3_wkn.labels_, metric = 'euclidean'),'\n',
'k = 4 the score is ',silhouette_score(X_wkn_v, km_4_wkn.labels_, metric = 'euclidean'),'\n',
'k = 5 the score is ',silhouette_score(X_wkn_v, km_5_wkn.labels_, metric = 'euclidean'),'\n',
'k = 6 the score is ',silhouette_score(X_wkn_v, km_6_wkn.labels_, metric = 'euclidean'),'\n',
'k = 7 the score is ',silhouette_score(X_wkn_v, km_7_wkn.labels_, metric = 'euclidean'))
%%time
km_3_wk = KMeans(3).fit(X_wk)
pd.Series.value_counts(km_3_wk.labels_)
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[0], alpha=0.7, lw=1, color = 'blue',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[1], alpha=0.7, lw=1, color = 'green',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[2], alpha=0.7, lw=1, color = 'red',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.figure(figsize=(7,5))
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[2],alpha=0.8,lw = 1,color = 'red')
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[1],alpha=0.8,lw = 1,color = 'green')
plt.plot(X_wk.columns, km_3_wk.cluster_centers_[0], alpha=0.8, lw=1, color = 'blue')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('All Clusters overlapping')
%%time
km_4_wkn = KMeans(4).fit(X_wkn)
pd.Series.value_counts(km_4_wkn.labels_)
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[0], alpha=0.7, lw=1, color = 'blue',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[1], alpha=0.7, lw=1, color = 'green',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[2], alpha=0.7, lw=1, color = 'red',linestyle='dotted')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[3], alpha=0.7, lw=1, color = 'purple')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('Cluster 0')
plt.figure(figsize=(10,5))
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[0], alpha=0.9, lw=3, color = 'blue')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[2], alpha=0.9, lw=3, color = 'red')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[1], alpha=0.9, lw=3, color = 'green')
plt.plot(X_wkn.columns, km_4_wkn.cluster_centers_[3], alpha=0.9, lw=3, color = 'purple')
plt.gca()
plt.tight_layout()
#plt.legend()
plt.xlabel('Timeline')
plt.ylabel('Energy Consumption (kW)')
plt.title('All Clusters overlapping')
actual_clus = km_3_wk.labels_
predicted_clus = km_3_wk.predict(X_wk)
adjusted_rand_score(actual_clus, predicted_clus)
#####Perfect labeling would be scored 1 and bad labelling or independent labelling is scored 0 or negative.
######### specify
######### t_hod = dfGCwk_dpat.index
t_hod = 12480
def plot_daily_cluster(cl, X, **ax_kwarg):
'''plot clusters of classifier `cl` with data `X`'''
labels = cl.labels_
# samples, colored with their membership
for k in np.unique(labels):
color = 'C{}'.format(k) if k>=0 else 'black'
plt.plot(t_hod, X.iloc[labels == k, :].T,
color=color, alpha=0.3, lw=0.5,
#drawstyle='steps-post',
);
# cluster centers
for k in np.unique(labels):
n_member = np.sum(labels == k)
if k>=0:
color = 'C{}'.format(k)
plt.plot(t_hod, cl.cluster_centers_[k],
label='{} ({})'.format(k, n_member),
color=color, lw=3);
ax = plt.gca()
ax.set(
xlim=(0, 24),
xlabel='hour of day'
)
ax.xaxis.set_major_locator(loc_6h);
ax.set(**ax_kwarg)
plt.tight_layout()
plt.legend()
estimate_bandwidth(X_wk.values, quantile=0.5)
estimate_bandwidth(X_wkn.values, quantile=0.5)
#######Mean shift clustering (long time to compute)
%%time
ms_wk = MeanShift(bandwidth=75, cluster_all=True).fit(X_wk)
pd.Series.value_counts(ms_wk.labels_)
#ms_wkn = MeanShift(bandwidth=8, cluster_all=True).fit(X_wkn)
#pd.Series.value_counts(ms.labels_)
#########Hierachichal CLustering (Comparitively long to kmeans)
%%time
hc_wk = AgglomerativeClustering(n_clusters = 3, affinity = 'euclidean', linkage ='ward')
X_hc_wk=hc_wk.fit(X_wk)
pd.Series.value_counts(X_hc_wk.labels_)
%%time
hc_wkn = AgglomerativeClustering(n_clusters = 4, affinity = 'euclidean', linkage ='ward')
X_hc_wkn = hc_wkn.fit(X_wkn)
pd.Series.value_counts(X_hc_wkn.labels_)
cls_list = km_3_wk.labels_
X_wk['cluster'] = cls_list
cls_list = km_4_wkn.labels_
X_wkn['cluster'] = cls_list
print(X_wk['cluster'].value_counts(), '\n',
X_wkn['cluster'].value_counts())
X_wk_C1 = X_wk[X_wk.cluster == 0]
X_wk_C2 = X_wk[X_wk.cluster == 1]
X_wk_C3 = X_wk[X_wk.cluster == 2]
#X_wk_C4 = X_wk[X_wk.cluster == 3]
#X_wk_C5 = X_wk[X_wk.cluster == 4]
X_wkn_C1 = X_wkn[X_wkn.cluster == 0]
X_wkn_C2 = X_wkn[X_wkn.cluster == 1]
X_wkn_C3 = X_wkn[X_wkn.cluster == 2]
X_wkn_C4 = X_wkn[X_wkn.cluster == 3]
X_wk_C1 = X_wk_C1.drop(columns = 'cluster')
X_wk_C1 = X_wk_C1.transpose()
X_wk_C1.head()
X_wk_C1 = X_wk_C1.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wk_C1, 97)
X_wk_C1[X_wk_C1 > cap] = cap
training_size=int(X_wk_C1.shape[0]*0.80)
test_size=(X_wk_C1.shape[0])-training_size
train,test=X_wk_C1[0:training_size],X_wk_C1[training_size:(X_wk_C1.shape[0])]
#Slidind Window
#convert an array of values into a dataset matrix
#def create_dataset(dataset, time_step=1):
# dataX, dataY = [], []
# for i in range(len(dataset)-time_step-1):
# a = dataset[i:(i+time_step),] ###i=0, 0,1,2,3---- #
# dataX.append(a)
# dataY.append(dataset[i + time_step,])
# return np.array(dataX), np.array(dataY)
#trainX, trainY = create_dataset(train, time_step)
#testX,testY = create_dataset(test, time_step)
#valX, valY = create_dataset(val, time_step)
# reshape into X=t,t+1,t+2,t+3 and Y=t+4
#time_step = 48
def get_batches(data, input_interval, target_interval, output_step_offset):
batched_data_x = []
batched_data_y = []
for i in range(input_interval, (len(data) - target_interval - output_step_offset)):
batched_data_x.append(data[i-input_interval:i])
batched_data_y.append(data[i+output_step_offset:i+target_interval+output_step_offset])
batched_data_x, batched_data_y = np.array(batched_data_x), np.array(batched_data_y)
return batched_data_x, batched_data_y
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_1 = models.Sequential()
Model_1.add(layers.Dense(400, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_1.add(Dropout(.01))
Model_1.add(BatchNormalization())
Model_1.add(Dense(200))
Model_1.add(Dropout(.01))
Model_1.add(BatchNormalization())
Model_1.add((Dense(trainX.shape[2])))
Model_1.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_1.summary()
model_train = Model_1.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size=64)
Seq_train = Model_1.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_1.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
def rmse(actual, pred):
return np.sqrt(((pred - actual) ** 2).mean())
def mae(actual, pred):
return np.mean(np.abs(actual - pred))
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.2))
#model.add(LSTM(100, activation='relu', return_sequences=True))
#model.add(Dropout(.2))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
# fit model
history_A_C1 = model.fit(trainX, trainY,
epochs=30,
validation_split = 0.05,
batch_size = 64,
callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_A_C1.history['loss'], label='train')
plt.plot(history_A_C1.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C1 = model.predict(trainX, verbose = 1)
print(testingtrain_C1[0], testingtrain_C1.shape)
testingtest_C1 = model.predict(testX, verbose = 1)
print(testingtest_C1[0], testingtest_C1.shape)
trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C1[:,1,:]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C1[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C1[:,1,:]))
print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C1[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
def rmse(actual, pred):
return np.sqrt(((pred - actual) ** 2).mean())
def mae(actual, pred):
return np.mean(np.abs(actual - pred))
plt.imshow(trainY[:47,47,:])
plt.show()
plt.imshow(testingtrain_C1[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_C1[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wk_C2 = X_wk_C2.drop(columns = 'cluster')
X_wk_C2 = X_wk_C2.transpose()
X_wk_C2.head()
X_wk_C2 = X_wk_C2.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wk_C2, 97)
X_wk_C2[X_wk_C2 > cap] = cap
training_size=int(X_wk_C2.shape[0]*0.80)
test_size=(X_wk_C2.shape[0])-training_size
train,test=X_wk_C2[0:training_size],X_wk_C2[training_size:(X_wk_C2.shape[0])]
def get_batches(data, input_interval, target_interval, output_step_offset):
batched_data_x = []
batched_data_y = []
for i in range(input_interval, (len(data) - target_interval - output_step_offset)):
batched_data_x.append(data[i-input_interval:i])
batched_data_y.append(data[i+output_step_offset:i+target_interval+output_step_offset])
batched_data_x, batched_data_y = np.array(batched_data_x), np.array(batched_data_y)
return batched_data_x, batched_data_y
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_2 = models.Sequential()
Model_2.add(layers.Dense(300, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_2.add(Dropout(0.2))
Model_2.add(BatchNormalization())
Model_2.add(layers.Dense(100, activation='relu'))
Model_2.add(Dropout(0.2))
Model_2.add(BatchNormalization())
Model_2.add((Dense(trainX.shape[2])))
Model_2.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_2.summary()
model_train = Model_2.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size=64)
Seq_train = Model_2.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_2.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:48,1,1], Seq_test[:48,1,1]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:48,1,1], Seq_test[:48,1,1]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
model = Sequential()
model.add(LSTM(70, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
model.add(Dropout(.2))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = 'adam', metrics = 'mae', loss='mse')
model.summary()
# fit model
history_C2 = model.fit(trainX,trainY, epochs=30,
batch_size = 32,
validation_split = 0.10, callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C2.history['loss'], label='train')
plt.plot(history_C2.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C2 = model.predict(trainX, verbose = 1)
print(testingtrain_C2[0], testingtrain_C2.shape)
testingtest_C2 = model.predict(testX, verbose = 1)
print(testingtest_C2[0], testingtest_C2.shape)
#trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C2[:,1,:]))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C2[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C3[:,1,:]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C2[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(testingtest_C2[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,1], marker='.', label="actual")
plt.plot(aa, testingtest_C2[:,1,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wk_C3 = X_wk_C3.drop(columns = 'cluster')
X_wk_C3 = X_wk_C3.transpose()
X_wk_C3.head()
X_wk_C3 = X_wk_C3.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wk_C3, 97)
X_wk_C3[X_wk_C3 > cap] = cap
training_size=int(X_wk_C3.shape[0]*0.80)
test_size=(X_wk_C3.shape[0])-training_size
train,test=X_wk_C3[0:training_size],X_wk_C3[training_size:(X_wk_C3.shape[0])]
def get_batches(data, input_interval, target_interval, output_step_offset):
batched_data_x = []
batched_data_y = []
for i in range(input_interval, (len(data) - target_interval - output_step_offset)):
batched_data_x.append(data[i-input_interval:i])
batched_data_y.append(data[i+output_step_offset:i+target_interval+output_step_offset])
batched_data_x, batched_data_y = np.array(batched_data_x), np.array(batched_data_y)
return batched_data_x, batched_data_y
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_3 = models.Sequential()
Model_3.add(layers.Dense(600, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_3.add(Dropout(0.5))
Model_3.add(BatchNormalization())
Model_3.add(layers.Dense(300, activation='relu'))
Model_3.add(Dropout(0.5))
Model_3.add(BatchNormalization())
Model_3.add((Dense(trainX.shape[2])))
Model_3.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_3.summary()
model_train = Model_3.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size = 64)
Seq_train = Model_3.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_3.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.01))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
#, callbacks=[lr_decay]
# fit model
history_C3 = model.fit(trainX,trainY, epochs=30, batch_size = 64,
validation_split = 0.10, callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C3.history['loss'], label='train')
plt.plot(history_C3.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C3 = model.predict(trainX, verbose = 1)
print(testingtrain_C3[0], testingtrain_C3.shape)
testingtest_C3 = model.predict(testX, verbose = 1)
print(testingtest_C3[0], testingtest_C3.shape)
trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C3[:,1,:]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C3[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C3[:,1,:]))
print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C3[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(trainY[:47,47,:])
plt.show()
plt.imshow(testingtrain_C3[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_C3[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
#################. WEEKEND ##################################
%%time
km_4_wkn = KMeans(4).fit(X_wkn)
pd.Series.value_counts(km_4_wkn.labels_)
cls_list = km_4_wkn.labels_
X_wkn['cluster'] = cls_list
X_wkn_C1 = X_wkn[X_wkn.cluster == 0]
X_wkn_C2 = X_wkn[X_wkn.cluster == 1]
X_wkn_C3 = X_wkn[X_wkn.cluster == 2]
X_wkn_C4 = X_wkn[X_wkn.cluster == 3]
X_wkn_C1 = X_wkn_C1.drop(columns = 'cluster')
X_wkn_C1 = X_wkn_C1.transpose()
X_wkn_C1.head()
X_wkn_C1 = X_wkn_C1.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wkn_C1, 97)
X_wkn_C1[X_wkn_C1 > cap] = cap
training_size=int(X_wkn_C1.shape[0]*0.80)
test_size=(X_wkn_C1.shape[0])-training_size
train,test=X_wkn_C1[0:training_size],X_wkn_C1[training_size:(X_wkn_C1.shape[0])]
def get_batches(data, input_interval, target_interval, output_step_offset):
batched_data_x = []
batched_data_y = []
for i in range(input_interval, (len(data) - target_interval - output_step_offset)):
batched_data_x.append(data[i-input_interval:i])
batched_data_y.append(data[i+output_step_offset:i+target_interval+output_step_offset])
batched_data_x, batched_data_y = np.array(batched_data_x), np.array(batched_data_y)
return batched_data_x, batched_data_y
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_1 = models.Sequential()
Model_1.add(layers.Dense(400, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_1.add(Dropout(0.2))
Model_1.add(BatchNormalization())
Model_1.add(layers.Dense(200, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_1.add(Dropout(0.2))
Model_1.add(BatchNormalization())
Model_1.add((Dense(trainX.shape[2])))
Model_1.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_1.summary()
model_train = Model_1.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size = 64)
Seq_train = Model_1.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_1.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.01))
#model.add(Dense(25))
#model.add(LSTM(50, activation='relu', return_sequences = True))
#model.add(Dropout(.1))
model.add(Dense(trainX.shape[2]))
model.compile(optimizer = 'adam', metrics = 'mae', loss='mse')
model.summary()
# fit model
history_C1 = model.fit(trainX,trainY, epochs=30,
validation_split = 0.10, batch_size=64, callbacks=[lr_decay])
#, callbacks=[lr_decay]
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C1.history['loss'], label='train')
plt.plot(history_C1.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C1 = model.predict(trainX, verbose = 1)
print(testingtrain_C1[0], testingtrain_C1.shape)
testingtest_C1 = model.predict(testX, verbose = 1)
print(testingtest_C1[0], testingtest_C1.shape)
trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C1[:,1,:]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C1[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C1[:,1,:]))
print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C1[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
def rmse(actual, pred):
return np.sqrt(((pred - actual) ** 2).mean())
def mae(actual, pred):
return np.mean(np.abs(actual - pred))
plt.imshow(trainY[:47,47,:])
plt.show()
plt.imshow(testingtrain_C1[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_C1[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wkn_C2 = X_wkn_C2.drop(columns = 'cluster')
X_wkn_C2 = X_wkn_C2.transpose()
X_wkn_C2.head()
X_wkn_C2 = X_wkn_C2.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wkn_C2, 97)
X_wkn_C2[X_wkn_C2 > cap] = cap
training_size=int(X_wkn_C2.shape[0]*0.80)
test_size=(X_wkn_C2.shape[0])-training_size
train,test=X_wkn_C2[0:training_size],X_wkn_C2[training_size:(X_wkn_C2.shape[0])]
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_2 = models.Sequential()
Model_2.add(layers.Dense(400, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_2.add(Dropout(0.5))
Model_2.add(BatchNormalization())
Model_2.add(layers.Dense(200, activation='relu'))
Model_2.add(Dropout(0.5))
Model_2.add(BatchNormalization())
Model_2.add((Dense(trainX.shape[2])))
Model_2.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_2.summary()
model_train = Model_2.fit(trainX,trainY, epochs=30, batch_size = 32, validation_split = 0.10)
Seq_train = Model_2.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_2.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:48,1,1], Seq_test[:48,1,1]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:48,1,1], Seq_test[:48,1,1]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
model.add(Dropout(0.2))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = 'adam', metrics = 'mae', loss='mse')
model.summary()
# fit model
history_C2 = model.fit(trainX,trainY, epochs=30, batch_size = 32,
validation_split = 0.10, callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C2.history['loss'], label='train')
plt.plot(history_C2.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C2 = model.predict(trainX, verbose = 1)
print(testingtrain_C2[0], testingtrain_C2.shape)
testingtest_C2 = model.predict(testX, verbose = 1)
print(testingtest_C2[0], testingtest_C2.shape)
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C2[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C2[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(trainY[:47,47,:])
plt.show()
plt.imshow(testingtrain_C2[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,1], marker='.', label="actual")
plt.plot(aa, testingtest_C2[:,1,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wkn_C3 = X_wkn_C3.drop(columns = 'cluster')
X_wkn_C3 = X_wkn_C3.transpose()
X_wkn_C3.head()
X_wkn_C3 = X_wkn_C3.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wkn_C3, 97)
X_wkn_C3[X_wkn_C3 > cap] = cap
training_size=int(X_wkn_C3.shape[0]*0.80)
test_size=(X_wkn_C3.shape[0])-training_size
train,test=X_wkn_C3[0:training_size],X_wkn_C3[training_size:(X_wkn_C3.shape[0])]
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_3 = models.Sequential()
Model_3.add(layers.Dense(400, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_3.add(Dropout(0.2))
Model_3.add(BatchNormalization())
Model_3.add(layers.Dense(200, activation='relu'))
Model_3.add(Dropout(0.2))
Model_3.add(BatchNormalization())
Model_3.add((Dense(trainX.shape[2])))
Model_3.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_3.summary()
model_train = Model_3.fit(trainX,trainY, epochs=30, batch_size = 64, validation_split = 0.10)
Seq_train = Model_3.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_3.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:48,47,:])
plt.show()
plt.imshow(Seq_test[:48,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.01))
#model.add(LSTM(50, activation='relu', return_sequences = True))
#model.add(Dropout(.01))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
#, callbacks=[lr_decay]
#optimizers.Adam(lr=0.001)
# fit model
history_C3 = model.fit(trainX,trainY, epochs=30,
validation_split = 0.10, callbacks=[lr_decay],
batch_size = 64)
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C3.history['loss'], label='train')
plt.plot(history_C3.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C3 = model.predict(trainX, verbose = 1)
print(testingtrain_C3[0], testingtrain_C3.shape)
testingtest_C3 = model.predict(testX, verbose = 1)
print(testingtest_C3[0], testingtest_C3.shape)
trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C3[:,1,:]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C3[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C3[:,1,:]))
print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_C3[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(trainY[:47,47,:])
plt.show()
plt.imshow(testingtrain_C3[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_C3[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wkn_C4 = X_wkn_C4.drop(columns = 'cluster')
X_wkn_C4 = X_wkn_C4.transpose()
X_wkn_C4.head()
X_wkn_C4 = X_wkn_C4.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
cap = np.percentile(X_wkn_C4, 95)
X_wkn_C4[X_wkn_C4 > cap] = cap
X_wkn_C4.describe()
training_size=int(X_wkn_C4.shape[0]*0.80)
test_size=(X_wkn_C4.shape[0])-training_size
train,test=X_wkn_C4[0:training_size],X_wkn_C4[training_size:(X_wkn_C4.shape[0])]
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
Model_4 = models.Sequential()
Model_4.add(layers.Dense(50, activation='relu', input_shape=(trainX.shape[1],trainX.shape[2])))
Model_4.add(Dropout(0.2))
Model_4.add(BatchNormalization())
Model_4.add(layers.Dense(50, activation='relu'))
Model_4.add(Dropout(0.2))
Model_4.add(BatchNormalization())
Model_4.add((Dense(trainX.shape[2])))
Model_4.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_4.summary()
model_train = Model_4.fit(trainX,trainY, epochs=30, batch_size = 32, validation_split = 0.10)
Seq_train = Model_4.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_4.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
def rmse(actual, pred):
return np.sqrt(((pred - actual) ** 2).mean())
def mae(actual, pred):
return np.mean(np.abs(actual - pred))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
plt.imshow(testY[:47,47,:])
plt.show()
plt.imshow(Seq_test[:47,47,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.5))
#model.add(LSTM(50, activation='relu', return_sequences = True))
#model.add(Dropout(.01))
model.add(Dense(trainX.shape[2]))
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
# fit model
history_C4 = model.fit(trainX,trainY, epochs=30, batch_size = 32,
validation_split = 0.10, callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_C4.history['loss'], label='train')
plt.plot(history_C4.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_C4 = model.predict(trainX, verbose = 1)
print(testingtrain_C4[0], testingtrain_C4.shape)
testingtest_C4 = model.predict(testX, verbose = 1)
print(testingtest_C4[0], testingtest_C4.shape)
testMAE = np.mean(mae(testY[:,1,:], testingtest_C4[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_C4[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
plt.imshow(testY[:47,1,:])
plt.show()
plt.imshow(testingtest_C4[:47,1,:])
plt.show()
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_C4[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
#X_wk_C1 = X_wk_C1.drop(columns = 'cluster')
X_wk = X_wk.transpose()
X_wk.head()
X_wk = X_wk.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
#cap = np.percentile(X_wk_C1, 97)
#X_wk_C1[X_wk_C1 > cap] = cap
training_size=int(X_wk.shape[0]*0.80)
test_size=(X_wk.shape[0])-training_size
train,test=X_wk[0:training_size],X_wk[training_size:(X_wk.shape[0])]
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_wk = models.Sequential()
Model_wk.add(layers.Dense(400, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2])))
Model_wk.add(Dropout(.01))
Model_wk.add(BatchNormalization())
Model_wk.add(Dense(200))
Model_wk.add(Dropout(.01))
Model_wk.add(BatchNormalization())
Model_wk.add((Dense(trainX.shape[2])))
Model_wk.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_wk.summary()
model_train = Model_wk.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size=64)
Seq_train = Model_wk.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_wk.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,1,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape =( trainX.shape[1],trainX.shape[2]), return_sequences = True))
#model.add(Dropout(.2))
#model.add(LSTM(100, activation='relu', return_sequences=True))
#model.add(Dropout(.2))
#model.add(Dropout(.01))
#model.add(LSTM(50, activation='relu', return_sequences = True))
#model.add(Dropout(.01))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.80 ** epoch))
# fit model
history_wk = model.fit(trainX,trainY, epochs=20,
validation_split = 0.10,
batch_size = 64,
callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_wk.history['loss'], label='train')
plt.plot(history_wk.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_wk = model.predict(trainX, verbose = 1)
print(testingtrain_wk[0], testingtrain_wk.shape)
testingtest_wk = model.predict(testX, verbose = 1)
print(testingtest_wk[0], testingtest_wk.shape)
trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_wk[:,1,:]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_wk[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_wk[:,1,:]))
print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_wk[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,:1], marker='.', label="actual")
plt.plot(aa, testingtest_wk[:,1,:1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
X_wkn = X_wkn.transpose()
X_wkn.head()
X_wkn = X_wkn.values
########### Ranging the values from 0 to 1
#scaler = MinMaxScaler(feature_range=(0, 1))
#Xtrain = scaler.fit(Xtrain.reshape(0, 1))
#X_wk_C1 = scaler.fit_transform(X_wk_C1)
#####Avoiding Outliers
#cap = np.percentile(X_wk_C1, 97)
#X_wk_C1[X_wk_C1 > cap] = cap
training_size=int(X_wkn.shape[0]*0.80)
test_size=(X_wkn.shape[0])-training_size
train,test=X_wkn[0:training_size],X_wkn[training_size:(X_wkn.shape[0])]
trainX, trainY = get_batches(train, 48, 48, 48)
testX, testY = get_batches(test, 48, 48, 48)
print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)
###Building a sequential network:
Model_wkn = models.Sequential()
Model_wkn.add(layers.Dense(600, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2])))
Model_wkn.add(Dropout(.2))
Model_wkn.add(BatchNormalization())
Model_wkn.add(Dense(300))
Model_wkn.add(Dropout(.2))
Model_wkn.add(BatchNormalization())
Model_wkn.add((Dense(trainX.shape[2])))
Model_wkn.compile(optimizer= optimizers.Adam(lr=0.001), loss='mse', metrics=['mae'])
Model_wkn.summary()
model_train = Model_wkn.fit(trainX,trainY, epochs=30, validation_split = 0.10, batch_size=64)
Seq_train = Model_wkn.predict(trainX, verbose = 1)
print(Seq_train[0], Seq_train.shape)
Seq_test = Model_wkn.predict(testX, verbose = 1)
print(Seq_test[0], Seq_test.shape)
#trainScore = math.sqrt(mean_squared_error(trainY_RMSE, testingtrain_C1))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:48,1,1], testingtrain_C2[:48,1,1]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], Seq_test[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,47,1], marker='.', label="actual")
plt.plot(aa, Seq_test[:,47,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()
model = Sequential()
model.add(LSTM(300, activation='relu', input_shape =(trainX.shape[1],trainX.shape[2]), return_sequences = True))
model.add(Dropout(.2))
model.add(Dense(trainX.shape[2]))
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9), 'optimizers.Adam(lr=0.01)'
model.compile(optimizer = optimizers.Adam(lr=0.001), metrics = 'mae', loss='mse')
model.summary()
# fit model
history_wkn = model.fit(trainX,trainY, epochs=20,
batch_size = 64,
validation_split = 0.10,
callbacks=[lr_decay])
fig = plt.figure(figsize=(5,3), dpi=75) #set figure size
plt.plot(history_wkn.history['loss'], label='train')
plt.plot(history_wkn.history['val_loss'], label='val')
plt.ylabel('Pérdida')
plt.xlabel('Epoch')
plt.legend()
plt.show()
testingtrain_wkn = model.predict(trainX, verbose = 1)
print(testingtrain_wkn[0], testingtrain_wkn.shape)
testingtest_wkn = model.predict(testX, verbose = 1)
print(testingtest_wkn[0], testingtest_wkn.shape)
#trainScore = math.sqrt(mean_squared_error(trainY[:,1,:], testingtrain_C2[:,1,:]))
#print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,1,:], testingtest_wkn[:,1,:]))
print('Test Score: %.2f RMSE' % (testScore))
#trainMAE = np.mean(mae(trainY[:,1,:], testingtrain_C3[:,1,:]))
#print('Train Score: %.2f MAE' % (trainMAE))
testMAE = np.mean(mae(testY[:,1,:], testingtest_wkn[:,1,:]))
print('Test Score: %.2f MAE' % (testMAE))
aa=[x for x in range(testY.shape[0])]
plt.figure(figsize=(20,5))
plt.plot(aa, testY[:,1,1], marker='.', label="actual")
plt.plot(aa, testingtest_wkn[:,1,1], 'r', label="prediction")
plt.ylabel('Energy', size=15)
plt.xlabel('Time step', size=15)
plt.legend(fontsize=15)
plt.show()